1 /* 2 * Copyright (C) 2009 The Guava Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.common.html; 18 19 import com.google.common.annotations.Beta; 20 import com.google.common.annotations.GwtCompatible; 21 import com.google.common.escape.Escaper; 22 import com.google.common.escape.Escapers; 23 24 /** 25 * {@code Escaper} instances suitable for strings to be included in HTML 26 * attribute values and <em>most</em> elements' text contents. When possible, 27 * avoid manual escaping by using templating systems and high-level APIs that 28 * provide autoescaping. 29 * 30 * <p>HTML escaping is particularly tricky: For example, <a 31 * href="http://goo.gl/5TgZb">some elements' text contents must not be HTML 32 * escaped</a>. As a result, it is impossible to escape an HTML document 33 * correctly without domain-specific knowledge beyond what {@code HtmlEscapers} 34 * provides. We strongly encourage the use of HTML templating systems. 35 * 36 * @author Sven Mawson 37 * @author David Beaumont 38 * @since 15.0 39 */ 40 @Beta 41 @GwtCompatible 42 public final class HtmlEscapers { 43 /** 44 * Returns an {@link Escaper} instance that escapes HTML metacharacters as 45 * specified by <a href="http://www.w3.org/TR/html4/">HTML 4.01</a>. The 46 * resulting strings can be used both in attribute values and in <em>most</em> 47 * elements' text contents, provided that the HTML document's character 48 * encoding can encode any non-ASCII code points in the input (as UTF-8 and 49 * other Unicode encodings can). 50 * 51 * 52 * <p><b>Note:</b> This escaper only performs minimal escaping to make content 53 * structurally compatible with HTML. Specifically, it does not perform entity 54 * replacement (symbolic or numeric), so it does not replace non-ASCII code 55 * points with character references. This escaper escapes only the following 56 * five ASCII characters: {@code '"&<>}. 57 */ 58 public static Escaper htmlEscaper() { 59 return HTML_ESCAPER; 60 } 61 62 // For each xxxEscaper() method, please add links to external reference pages 63 // that are considered authoritative for the behavior of that escaper. 64 65 private static final Escaper HTML_ESCAPER = 66 Escapers.builder() 67 .addEscape('"', """) 68 // Note: "'" is not defined in HTML 4.01. 69 .addEscape('\'', "'") 70 .addEscape('&', "&") 71 .addEscape('<', "<") 72 .addEscape('>', ">") 73 .build(); 74 75 private HtmlEscapers() {} 76 }